#--- REPLICATION CODE FOR: CONTACT AND CONTEXT: HOW MUNICIPAL TRAFFIC STOPS SHAPE CITIZEN CHARACTER
#--- AUTHORS: ALLISON ANOLL, DEREK EPP, AND MACKENZIE ISRAEL-TRUMMEL



## --- Load Required Packages -----------
library(car)
library(multilevel)
library(doBy)
library(ggplot2)
library(lme4)
library(lmerTest)
library(texreg)



# ---- Load Data ---------------------
# Notes:
# Loaded data includes survey data of Black and White respondents living in IL and NC
# Plus traffic stops data at the municipal level for IL and NC


#----- Set working directory to load data 
setwd("")


##Load data file
stops <- read.csv(file="stopsdata.csv") 
names(stops)



#----- Clean and Recode Variables ---------------------

## d001: number of times stopped by the police: pooling top two categories
table(stops$d001)
stops$stopped <- recode(stops$d001, "0=0; 1=1; 2=2; 3=2") 
table(stops$d001, stops$stopped)

## alternative codings of stops for descriptives and appendix: stopped at all, and most stopped

stops$stopped.bin[stops$stopped == 0] <- 0
stops$stopped.bin[stops$stopped > 0] <- 1
table(stops$stopped, stops$stopped.bin)


stops$moststopped[stops$stopped < 2] <- 0
stops$moststopped[stops$stopped ==2] <- 1
table(stops$stopped, stops$moststopped)



## make 0 equal rate of stops for whites and blacks instead of 1 for pctstopinvest_ratio 

summary(stops$pctstopinvest_ratio)
stops$pctstopinvest_ratio_zero <- stops$pctstopinvest_ratio - 1
summary(stops$pctstopinvest_ratio_zero)


##look at distribution of respondents in places more likely to stop whites versus places more likely to stop black drivers
summary(stops$pctstopinvest_ratio_zero)
stops$antiblack[stops$pctstopinvest_ratio_zero > 0] <- 1
stops$antiblack[stops$pctstopinvest_ratio_zero < 0.00001] <- 0
table(stops$pctstopinvest_ratio_zero, stops$antiblack)
prop.table(table(stops$antiblack)) ##0.91601344 in anti-black places




## i002: know someone convicted of felony
table(stops$i002)
stops$networkfel[stops$i002 == 0] <- 0 
stops$networkfel[stops$i002 == 1] <- 1 
stops$networkfel[stops$i002 > 1] <- 2 
table(stops$i002, stops$networkfel)


## c011: income
table(stops$c011)
stops$income <- stops$c011
table(stops$c011, stops$income)



## d011: felony conviction
table(stops$d011)
stops$felcon <- recode(stops$d011, "9=NA")
table(stops$felcon)



## a002: victim of crime
table(stops$a002)
stops$victim <- stops$a002
table(stops$a002, stops$victim)



## c014: ideology
table(stops$c014)
stops$ideo <- recode(stops$c014, "9=NA")
table(stops$ideo)



## education: edu
table(stops$edu) #only 3 people at highest education level. Recoded 5 to pool with 4.
stops$education <- recode(stops$edu, "5=4")
table(stops$edu, stops$education)



## gender
table(stops$woman)



## Variable for states
table(stops$state)
stops$Illinois[stops$state == 32] <- 0
stops$Illinois[stops$state == 12] <- 1
table(stops$Illinois)



## race: white or black
table(stops$race)
stops$white[stops$race == 1] <- 1
stops$white[stops$race == 2] <- 0
table(stops$race, stops$white)



## age
table(stops$age)



## evaluation of police 
table(stops$a005_1) #solving crime
table(stops$a005_2) #protecting ppl like you from crime
table(stops$a005_3) #treating racial/ethnic groups equally
table(stops$a005_4) #not using excessive force
table(stops$a005_5) #holding accountable for misconduct

stops$index <- stops$a005_1 + stops$a005_2 + stops$a005_3 + stops$a005_4 + stops$a005_5
table(stops$index)


##Cronbach's alpha on index of police evaluation
policeevaluation <- cbind(stops$a005_1, stops$a005_2, stops$a005_3, stops$a005_4, stops$a005_5)
cronbach(policeevaluation) 
##ALPHA =  0.9231771

## Voting and political participation

table(stops$cp001_1)
table(stops$cp001_2)
table(stops$cp001_3)
table(stops$cp001_4)
table(stops$cp001_5)
table(stops$cp001_6)
table(stops$cp001_7)


stops$voted <- recode(stops$cp001_1, "1=1; NA=0")
stops$meeting <- recode(stops$cp001_2, "1=1; NA=0")
stops$contacted <- recode(stops$cp001_3, "1=1; NA=0")
stops$donated <- recode(stops$cp001_4, "1=1; NA=0")
stops$campaigned <- recode(stops$cp001_5, "1=1; NA=0")
stops$petitioned <- recode(stops$cp001_6, "1=1; NA=0")
stops$protested <- recode(stops$cp001_7, "1=1; NA=0")


table(stops$cp001_1)
table(stops$voted)

table(stops$cp001_2)
table(stops$meeting)

table(stops$cp001_3)
table(stops$contacted)

table(stops$cp001_4)
table(stops$donated)

table(stops$cp001_5)
table(stops$campaigned)

table(stops$cp001_6)
table(stops$petitioned)

table(stops$cp001_7)
table(stops$protested)


stops$participation <- stops$meeting + stops$contacted + stops$donated + stops$campaigned + stops$petitioned + stops$protested 

table(stops$participation)

## collapse 3 or more acts together
stops$participation4 <- recode(stops$participation, "0=0; 1=1; 2=2; else=3 ")
table(stops$participation, stops$participation4)
table(stops$participation4)





## race subsets
ws <- subset(stops, race==1)
bs <- subset(stops, race==2)
table(ws$white)
table(bs$white)



## ----- FIGURES AND MODELS -------------------

## Figure 1a: Distribution of Investigatory Stops Ratio municipalities

names(stops)
stops$stateabbrev[stops$Illinois == 0] <- "NC"
stops$stateabbrev[stops$Illinois == 1] <- "IL"

table(stops$Illinois, stops$stateabbrev)

stops$citystate <- paste(stops$city, stops$stateabbrev, sep=",")
levels(as.factor(stops$citystate))

collapse <- summaryBy(pctstopinvest_ratio_zero ~ citystate, FUN = (min), data = stops)
names(collapse)[2]  <- "pctstopinvest_ratio_zero"


plotdata <- na.omit(collapse)
mean(plotdata$pctstopinvest_ratio_zero) ##0.1775462 average score among municipalities
table(plotdata$pctstopinvest_ratio_zero) ##see highest and lowest values
plotdata ##find cities/towns attached to highest/lowest values
##highest values: 1.2767 (Cobden, IL), 1.3211956 (East St Louis), 1.4265869 (Macon, NC), 1.4842083 (Chicago)
##five -1 values: Davis, IL; Marine, IL; Omaha, IL; Sherrard, IL; Toulon, IL


fig1a <- ggplot(data = plotdata, aes(pctstopinvest_ratio_zero)) +
  geom_histogram(col = "black", fill = "8c8c8c") +
  ylim(0, 80) + 
  geom_vline(aes(xintercept = mean(pctstopinvest_ratio_zero)), linetype = "dashed", size = 1) +
  annotate("text", x=0.65, y=75, label="mean==0.18", color = "black", size = 7.14, parse = T)+
  labs(x="Ratio") +
  labs(y="Municipalities") +
  theme_bw() +
  theme(axis.text=element_text(size=20), 
        axis.title=element_text(size=25),  legend.title = element_blank(), legend.text = element_text(size = 20)) 


pdf(file="Fig1a.pdf", height=6, width=7)
fig1a
dev.off()



##Figure 1b: Distribution of stopped by police variable from survey data


plotdata2 <- data.frame(prop.table(table(stops$white, stops$stopped), margin=1))
plotdata2$race[plotdata2$Var1 == 0] <- "Black"
plotdata2$race[plotdata2$Var1 == 1] <- "White"

plotdata2$stopped[plotdata2$Var2 == 0] <- "0 times"
plotdata2$stopped[plotdata2$Var2 == 1] <- "1-2 times"
plotdata2$stopped[plotdata2$Var2 == 2] <- "3+ times"

names(plotdata2)[3]  <- "proportion"

##estimate plus or minus the z score val * se

#calculate mean and sd for black rs. black N=328
blkmean <- mean(bs$stopped)
seblack <- sd(bs$stopped)/sqrt(328) 

#calculate mean and sd for white rs. white N=565
whtmean <- mean(ws$stopped)
sewht <- sd(ws$stopped)/sqrt(565) 

plotdata2$N[plotdata2$Var1 == 0] <- 328
plotdata2$N[plotdata2$Var1 == 1] <- 565

plotdata2$se[plotdata2$Var1 == 0] <- 0.04012454
plotdata2$se[plotdata2$Var1 == 1] <- 0.02577547

plotdata2$lwr.80 <- plotdata2$proportion - 1.28 * plotdata2$se
plotdata2$upr.80 <- plotdata2$proportion + 1.28 * plotdata2$se

fig1b <- ggplot(plotdata2, aes(x = stopped, y = proportion, fill = race))+ 
  geom_bar(position = "dodge", stat = "identity") + geom_errorbar(aes(ymin = lwr.80, ymax = upr.80),width = .2, position=position_dodge(.9))+
  labs(x="",y="")+
  scale_y_continuous(labels=scales::percent) +
  scale_fill_manual(values=c("#8c8c8c", "#cccccc")) +
  theme_bw() +
  labs(x="Number of Times Stopped") +
  theme(axis.text=element_text(size=20), 
        axis.title=element_text(size=25),  legend.title = element_blank(), legend.text = element_text(size = 20)) +
  theme(legend.position = c(0.85, .9))

pdf(file="Fig1b.pdf", height=6, width=7)
fig1b
dev.off()


##t test differences between white and black respondents for being stopped at all, and for being stopped the most
prop.table(table(stops$white, stops$stopped.bin), margin=1)
table(stops$stopped.bin)

t.test(stops$stopped.bin ~ stops$white) #p-value = 0.0007973

prop.table(table(stops$white, stops$moststopped), margin=1)

t.test(stops$moststopped ~ stops$white) # p-value = 0.0001316



## Multivariate Regressions ---------------------
# With Controls: victim of a crime; felony conviction; network felony convictions, income, ideology, education, gender, race, state. Random effects for zip and state

evalindex_mixed <- lmer(index ~ pctstopinvest_ratio_zero + as.factor(stopped) + networkfel + victim + felcon  + income + ideo + education + woman + white + age + (1 | Illinois/zip), data = stops)

vote_mixed <- lmer(voted ~ pctstopinvest_ratio_zero + as.factor(stopped) + networkfel + victim + felcon  + income + ideo + education + woman + white + age  + (1 | Illinois/zip), data = stops)

nonvoting_mixed <- lmer(participation4 ~ pctstopinvest_ratio_zero + as.factor(stopped) + networkfel + victim + felcon  + income + ideo + education + woman + white + age  + (1 | Illinois/zip), data = stops)


summary(evalindex_mixed)
summary(vote_mixed)
summary(nonvoting_mixed)



## regression table for models
texreg(list(evalindex_mixed, vote_mixed, nonvoting_mixed),
       custom.coef.names = c(
         "(Intercept)",
         "% Invest. Stops Ratio",
         "Stopped 1-2 times",
         "Stopped 3+ times",
         "Proximal carceral contact",
         "Crime victim",
         "Felony conviction",
         "Income",
         "Conservatism",
         "Education",
         "Woman",
         "White",
         "Age"
       ),
       custom.model.names = c("Evaluation of Police", "Turnout", "Participatory Acts"),
       dcolumn=TRUE, booktabs = TRUE, use.packages = FALSE, label ="tab:regs",  stars = c(0.10, 0.05),   caption = "Policing Context and Personal Stop History on Views of Police and Participation", float.pos = "t", single.row = TRUE) 


## Effect sizes for interpretation: calculated two different ways
summary(evalindex_mixed)

summary(stops$pctstopinvest_ratio_zero) ##look at expected shift in police evaluations for pcststopinvest_ratio_zero variable which has a max value 1.4842. 0 on scale is equal stops by race. Coefficient on variable is -1.00949

-1.00949 * 1.4842 #comparing those with most anti-black to racially equal places
## 1.498285 diff: drops 1.5 going from equal stops to most anti-black


##effect of highest proximal contact
summary(nonvoting_mixed)

#coefficient is 0.13287, networkfel is coded 0, 1, 2
0.13287*2
#0.2657: those with highest contact expected to perform 0.27 additional acts compared to those with no prox contact



